Packages used
library(ape)
library(ggtree)
## ggtree v1.14.4 For help: https://guangchuangyu.github.io/software/ggtree
##
## If you use ggtree in published research, please cite the most appropriate paper(s):
##
## - Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36, doi:10.1111/2041-210X.12628
##
## - Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, accepted. doi: 10.1093/molbev/msy194
##
## Attaching package: 'ggtree'
## The following object is masked from 'package:ape':
##
## rotate
library(phangorn)
library(phytools)
## Loading required package: maps
##
## Attaching package: 'phytools'
## The following objects are masked from 'package:ggtree':
##
## read.newick, reroot
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(heatmaply)
## Loading required package: viridis
## Loading required package: viridisLite
##
## ======================
## Welcome to heatmaply version 0.15.2
##
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
##
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## Or contact: <tal.galili@gmail.com>
## ======================
library(dendextend)
##
## ---------------------
## Welcome to dendextend version 1.9.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:phytools':
##
## untangle
## The following object is masked from 'package:ggtree':
##
## rotate
## The following objects are masked from 'package:ape':
##
## ladderize, rotate
## The following object is masked from 'package:stats':
##
## cutree
library(vegan)
## Loading required package: permute
##
## Attaching package: 'permute'
## The following object is masked from 'package:dendextend':
##
## shuffle
## Loading required package: lattice
## This is vegan 2.5-3
##
## Attaching package: 'vegan'
## The following objects are masked from 'package:phangorn':
##
## diversity, treedist
library(corrgram)
##
## Attaching package: 'corrgram'
## The following object is masked from 'package:lattice':
##
## panel.fill
library(Hmisc)
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:plotly':
##
## subplot
## The following object is masked from 'package:ape':
##
## zoom
## The following objects are masked from 'package:base':
##
## format.pval, units
library(corrplot)
## corrplot 0.84 loaded
Build heatmap with phylogenetic tree attached
tree_rooted <- read.newick("input_files/ConcatAlnRooted.tre")
matrix_file <- read.table(file = "input_files/PresAbsSummary.txt",
header = TRUE, row.names = 1, stringsAsFactors = FALSE, sep = "\t")
order_mat <- read.table(file = "input_files/reorder.txt", header = TRUE, row.names = 1,
stringsAsFactors = FALSE, sep = "\t")
intermediate_tree <- ggtree(tree_rooted, branch.length = T, ladderize = TRUE) +
geom_tiplab(size = 2.5, linetype = "dashed", linesize = 0.1, align = TRUE) +
geom_point2(aes(subset = as.numeric(label)>79 & !isTip), color = "firebrick", size = 1) +
geom_treescale(x = -0.01, y = -5, offset = 2.5) +
theme(legend.position="none")
## Warning: Duplicated aesthetics after name standardisation: size
alpha <- gheatmap(intermediate_tree, matrix_file, offset = 0.7, width = 2.5, colnames = F) +
scale_fill_manual(values = c("lightgrey","black")) +
theme(legend.position="none") +
theme(text = element_text(size=20))
## Scale for 'fill' is already present. Adding another scale for 'fill',
## which will replace the existing scale.
bet <- get_heatmap_column_position(alpha, by = "bottom")
top <- get_heatmap_column_position(alpha, by = "top")
alpha +
geom_text(data = bet, aes(x, y, label = label), nudge_y = -0.5, size = 2.5, color = "red", angle = 90)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

ggsave("out_files/heattree.pdf", height = 20, width = 10, dpi = 300)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## optional interactive heatmap
x <- as.phylo(intermediate_tree)
x <- force.ultrametric(x, method = 'nnls')
row_dend <- x %>% as.dendrogram
heatmaply(order_mat, file = "out_files/heatmaply_plot.html", height = 1200, width = 400, fontsize_row = 4,
hide_colorbar = TRUE, dendrogram = 'row', Rowv = row_dend, margins = c(100,90))
Plot genomes per homologous group
### genome counts in bar plot
counts <- read.delim("input_files/cHGperGenome.txt")
ggplot(data = counts, aes(x = reorder(alpha_hg, -No..of.Genomes), y = No..of.Genomes)) +
geom_bar(stat = "identity", color="black", fill="steelblue") +
theme_bw() +
xlab("cHG") +
ylab("No. of genomes") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_y_continuous(expand = c(0,0), limits = c(0,180), breaks = seq(0,180, by = 30)) +
theme(axis.text.x = element_text(angle = 90))

ggsave("out_files/cHGperGenome.pdf", width = 6, height = 6, dpi = 500)
Plot rarefaction curve
pan_matrix <- read.table(file = "input_files/PresAbsMatrixAnnot.txt",
stringsAsFactors = TRUE, header = TRUE, row.names = 1, sep = "\t")
t_pan_matrix <- t(pan_matrix)
t_sp <- specaccum(t_pan_matrix,method="random",permutations=100)
#pdf('out_files/rarefaction_plot.pdf', width = 7, height = 7)
par(las = 1)
plot(t_sp, ci.type = "poly", col = "blue",lwd = 2, ci.lty = 0,
ci.col = "lightblue", xlab = "Gene families", ylab= "No. of taxa")
boxplot(t_sp,col = "yellow", add = TRUE)

Plot spearman correlations
pan_matrix <- read.table(file = "input_files/PresAbsMatrixAnnot.txt",
stringsAsFactors = TRUE, header = TRUE, row.names = 1, sep = "\t")
in_matrix <- as.matrix(pan_matrix)
spearman_rcorr <- rcorr(in_matrix, type = "spearman")
#write.table(spearman_rcorr$P, file = "out_files/spearman_p.txt", sep = "\t", row.names = TRUE, col.names = TRUE)
#write.table(spearman_rcorr$r, file = "out_files/spearman_r.txt", sep = "\t", row.names = TRUE, col.names = TRUE)
#### apply bonferroni correction for 48x48 pairwise comparisons
bonf_corr_p <- spearman_rcorr$P
spear_r <- spearman_rcorr$r
bonf_corr_p[is.na(bonf_corr_p)] <- 0
y <- function(x) ifelse(x*1128<1, x*1128, 1)
bonf_corr_p[] <- vapply(bonf_corr_p, y, numeric(1))
bonfer_matrix <- ifelse(bonf_corr_p<1, spear_r, 0)
#### output plot
#pdf("out_files/corrplot.pdf", height = 6, width = 6)
corrplot(bonfer_matrix, type = "lower", order = "FPC", tl.cex = 0.5, tl.srt = 30, tl.col = "black")
